Needs to be persistent for one uptime.
"""
import os
+import threading
import xen.lowlevel.xc
"""Dict of domain info indexed by domain id."""
domains = None
+
+
+ ## public:
def __init__(self):
# Hack alert. Python does not support mutual imports, but XendDomainInfo
# So we stuff the XendDomain instance (self) into xroot's components.
xroot.add_component("xen.xend.XendDomain", self)
self.domains = XendDomainDict()
+ self.refresh_lock = threading.Condition()
self.watchReleaseDomain()
self.refresh()
self.dom0_setup()
doms = self.list_sorted()
return map(lambda x: x.getName(), doms)
+
+ ## private:
+
def onReleaseDomain(self):
self.refresh()
def dom0_setup(self):
dom0 = self.domain_lookup(PRIV_DOMAIN)
- if not dom0:
- dom0 = self.recreate_domain(self.xen_domain(PRIV_DOMAIN))
- dom0.dom0_init_store()
dom0.dom0_enforce_vcpus()
if info.getDomid() in self.domains:
notify = False
self.domains[info.getDomid()] = info
- info.exportToDB()
- if notify:
- eserver.inject('xend.domain.create', [info.getName(),
- info.getDomid()])
+ #info.exportToDB()
+ #if notify:
+ # eserver.inject('xend.domain.create', [info.getName(),
+ # info.getDomid()])
def _delete_domain(self, domid, notify=True):
"""Remove a domain from the tables.
info = self.domains.get(domid)
if info:
del self.domains[domid]
- info.cleanup()
- info.delete()
+ info.cleanupDomain()
+ info.cleanupVm()
if notify:
eserver.inject('xend.domain.died', [info.getName(),
info.getDomid()])
def refresh(self):
"""Refresh domain list from Xen.
"""
- doms = self.xen_domains()
- for d in self.domains.values():
- info = doms.get(d.getDomid())
- if info:
- d.update(info)
- else:
- self._delete_domain(d.getDomid())
- for d in doms:
- if d not in self.domains:
- try:
- self.recreate_domain(doms[d])
- except:
- log.exception(
- "Failed to recreate information for domain %d. "
- "Destroying it in the hope of recovery.", d)
+ self.refresh_lock.acquire()
+ try:
+ doms = self.xen_domains()
+ for d in self.domains.values():
+ info = doms.get(d.getDomid())
+ if info:
+ d.update(info)
+ else:
+ self._delete_domain(d.getDomid())
+ for d in doms:
+ if d not in self.domains and not doms[d]['dying']:
try:
- xc.domain_destroy(dom = d)
+ self.recreate_domain(doms[d])
except:
- log.exception('Destruction of %d failed.', d)
+ if d == PRIV_DOMAIN:
+ log.exception(
+ "Failed to recreate information for domain "
+ "%d. Doing nothing except crossing my "
+ "fingers.", d)
+ else:
+ log.exception(
+ "Failed to recreate information for domain "
+ "%d. Destroying it in the hope of "
+ "recovery.", d)
+ try:
+ xc.domain_destroy(dom = d)
+ except:
+ log.exception('Destruction of %d failed.', d)
+ finally:
+ self.refresh_lock.release()
def update_domain(self, id):
else:
self._delete_domain(id)
+
+ ## public:
+
def domain_create(self, config):
"""Create a domain from a configuration.
return dominfo
def domain_configure(self, config):
- """Configure an existing domain. This is intended for internal
- use by domain restore and migrate.
+ """Configure an existing domain.
@param vmconfig: vm configuration
"""
- # We accept our configuration specified as ['config' [...]], which
- # some tools or configuration files may be using. For save-restore,
- # we use the value of XendDomainInfo.sxpr() directly, which has no
- # such item.
- nested = sxp.child_value(config, 'config')
- if nested:
- config = nested
- return XendDomainInfo.restore(config)
+ # !!!
+ raise XendError("Unsupported")
def domain_restore(self, src):
"""Restore a domain from file.
try:
fd = os.open(src, os.O_RDONLY)
- dominfo = XendCheckpoint.restore(self, fd)
+ dominfo = XendCheckpoint.restore(fd)
self._add_domain(dominfo)
return dominfo
except OSError, ex:
SHUTDOWN_TIMEOUT = 30
-DOMROOT = '/domain'
-VMROOT = '/domain'
+DOMROOT = '/local/domain/'
+VMROOT = '/vm/'
xc = xen.lowlevel.xc.new()
]
+def restore(config):
+ """Create a domain and a VM object to do a restore.
+
+ @param config: domain configuration
+ """
+
+ log.debug("XendDomainInfo.restore(%s)", config)
+
+ try:
+ uuid = sxp.child_value(config, 'uuid')
+ ssidref = int(sxp.child_value(config, 'ssidref'))
+ except TypeError, exn:
+ raise VmError('Invalid ssidref in config: %s' % exn)
+
+ vm = XendDomainInfo(uuid, XendDomainInfo.parseConfig(config),
+ xc.domain_create(ssidref = ssidref))
+ vm.storeVmDetails()
+ vm.configure()
+ vm.create_channel()
+# vm.exportToDB()
+# vm.refreshShutdown()
+ vm.storeDomDetails()
+ return vm
+
+
def domain_exists(name):
# See comment in XendDomain constructor.
xd = get_component('xen.xend.XendDomain')
@raise: VmError for invalid configuration
"""
- log.debug("XendDomainInfo.create(...)")
+ log.debug("XendDomainInfo.create(%s)", config)
vm = cls(getUuid(), cls.parseConfig(config))
vm.construct()
def recreate(cls, xeninfo):
- """Create the VM object for an existing domain."""
+ """Create the VM object for an existing domain. The domain must not
+ be dying, as the paths in the store should already have been removed,
+ and asking us to recreate them causes problems."""
log.debug("XendDomainInfo.recreate(%s)", xeninfo)
+ assert not xeninfo['dying']
+
domid = xeninfo['dom']
try:
dompath = GetDomainPath(domid)
raise XendError(
'No vm/uuid path in store for existing domain %d' % domid)
- except Exception, exn:
- log.warn(str(exn))
- uuid = getUuid()
-
- log.info("Recreating domain %d, uuid %s", domid, uuid)
-
- vm = cls(uuid, xeninfo, domid, True)
- vm.refreshShutdown(xeninfo)
- return vm
-
- recreate = classmethod(recreate)
+ log.info("Recreating domain %d, UUID %s.", domid, uuid)
+ vm = cls(uuid, xeninfo, domid, True)
- def restore(cls, config, uuid = None):
- """Create a domain and a VM object to do a restore.
-
- @param config: domain configuration
- @param uuid: uuid to use
- """
-
- log.debug("XendDomainInfo.restore(%s, %s)", config, uuid)
+ except Exception, exn:
+ log.warn(str(exn))
- if not uuid:
uuid = getUuid()
- try:
- ssidref = int(sxp.child_value(config, 'ssidref'))
- except TypeError, exn:
- raise VmError('Invalid ssidref in config: %s' % exn)
+ log.info("Recreating domain %d with new UUID %s.", domid, uuid)
+
+ vm = cls(uuid, xeninfo, domid, True)
+ vm.storeVmDetails()
+ vm.storeDomDetails()
- vm = cls(uuid, cls.parseConfig(config),
- xc.domain_create(ssidref = ssidref))
vm.create_channel()
- vm.configure()
- vm.exportToDB()
- vm.refreshShutdown()
+ if domid == 0:
+ vm.initStoreConnection()
+
+ vm.refreshShutdown(xeninfo)
return vm
- restore = classmethod(restore)
+ recreate = classmethod(recreate)
def parseConfig(cls, config):
self.uuid = uuid
self.info = info
- self.path = DOMROOT + "/" + uuid
-
if domid:
self.domid = domid
elif 'dom' in info:
else:
self.domid = None
+ self.vmpath = VMROOT + uuid
+ if self.domid is None:
+ self.dompath = None
+ else:
+ self.dompath = DOMROOT + str(self.domid)
+
if augment:
self.augmentInfo()
self.state = STATE_VM_OK
self.state_updated = threading.Condition()
-
- self.writeVm("uuid", self.uuid)
- self.storeDom("vm", self.path)
+ self.refresh_shutdown_lock = threading.Condition()
def augmentInfo(self):
self.info[name] = val
params = (("name", str),
- ("restart-mode", str),
+ ("restart_mode", str),
("image", str),
- ("start-time", float))
+ ("start_time", float))
from_store = self.gatherVm(*params)
map(lambda x, y: useIfNeeded(x[0], y), params, from_store)
+ device = []
+ for c in controllerClasses:
+ devconfig = self.getDeviceConfigurations(c)
+ if devconfig:
+ device.extend(map(lambda x: (c, x), devconfig))
+
+ useIfNeeded('device', device)
+
def validateInfo(self):
"""Validate and normalise the info block. This has either been parsed
# mem_kb.
def discard_negatives(name):
- if self.infoIsSet(name) and self.info[name] <= 0:
+ if self.infoIsSet(name) and self.info[name] < 0:
del self.info[name]
def valid_KiB_(mb_name, kb_name):
def valid_KiB(mb_name, kb_name):
result = valid_KiB_(mb_name, kb_name)
- if result <= 0:
+ if result is None or result < 0:
raise VmError('Invalid %s / %s: %s' %
(mb_name, kb_name, result))
else:
def readVm(self, *args):
- return xstransact.Read(self.path, *args)
+ return xstransact.Read(self.vmpath, *args)
def writeVm(self, *args):
- return xstransact.Write(self.path, *args)
+ return xstransact.Write(self.vmpath, *args)
def removeVm(self, *args):
- return xstransact.Remove(self.path, *args)
+ return xstransact.Remove(self.vmpath, *args)
def gatherVm(self, *args):
- return xstransact.Gather(self.path, *args)
+ return xstransact.Gather(self.vmpath, *args)
def storeVm(self, *args):
- return xstransact.Store(self.path, *args)
+ return xstransact.Store(self.vmpath, *args)
def readDom(self, *args):
- return xstransact.Read(self.path, *args)
+ return xstransact.Read(self.dompath, *args)
def writeDom(self, *args):
- return xstransact.Write(self.path, *args)
+ return xstransact.Write(self.dompath, *args)
def removeDom(self, *args):
- return xstransact.Remove(self.path, *args)
+ return xstransact.Remove(self.dompath, *args)
def gatherDom(self, *args):
- return xstransact.Gather(self.path, *args)
+ return xstransact.Gather(self.dompath, *args)
def storeDom(self, *args):
- return xstransact.Store(self.path, *args)
+ return xstransact.Store(self.dompath, *args)
- def exportToDB(self):
+ def storeVmDetails(self):
to_store = {
- 'domid': str(self.domid),
'uuid': self.uuid,
- 'xend/restart_mode': str(self.info['restart_mode']),
+ # !!!
+ 'memory/target': str(self.info['memory_KiB'])
+ }
+
+ if self.infoIsSet('image'):
+ to_store['image'] = sxp.to_string(self.info['image'])
+
+ for k in ['name', 'ssidref', 'restart_mode']:
+ if self.infoIsSet(k):
+ to_store[k] = str(self.info[k])
+
+ log.debug("Storing VM details: %s" % str(to_store))
+
+ self.writeVm(to_store)
+
+
+ def storeDomDetails(self):
+ to_store = {
+ 'domid': str(self.domid),
+ 'vm': self.vmpath,
'memory/target': str(self.info['memory_KiB'])
}
if v:
to_store[k] = str(v)
- to_store['image'] = sxp.to_string(self.info['image'])
-
- log.debug("Storing %s" % str(to_store))
+ log.debug("Storing domain details: %s" % str(to_store))
- self.writeVm(to_store)
+ self.writeDom(to_store)
def setDomid(self, domid):
def getName(self):
return self.info['name']
- def getPath(self):
- return self.path
+ def getDomainPath(self):
+ return self.dompath
def getUuid(self):
return self.uuid
def refreshShutdown(self, xeninfo = None):
- if xeninfo is None:
- xeninfo = dom_get(self.domid)
+ # If set at the end of this method, a restart is required, with the
+ # given reason. This restart has to be done out of the scope of
+ # refresh_shutdown_lock.
+ restart_reason = None
+
+ self.refresh_shutdown_lock.acquire()
+ try:
if xeninfo is None:
- # The domain no longer exists. This will occur if we have
- # scheduled a timer to check for shutdown timeouts and the
- # shutdown succeeded.
+ xeninfo = dom_get(self.domid)
+ if xeninfo is None:
+ # The domain no longer exists. This will occur if we have
+ # scheduled a timer to check for shutdown timeouts and the
+ # shutdown succeeded. It will also occur if someone
+ # destroys a domain beneath us. We clean up, just in
+ # case.
+ self.cleanupDomain()
+ self.cleanupVm()
+ return
+
+ if xeninfo['dying']:
+ # Dying means that a domain has been destroyed, but has not
+ # yet been cleaned up by Xen. This could persist indefinitely
+ # if, for example, another domain has some of its pages
+ # mapped. We might like to diagnose this problem in the
+ # future, but for now all we do is make sure that it's not
+ # us holding the pages, by calling the cleanup methods.
+ self.cleanupDomain()
+ self.cleanupVm()
return
- if xeninfo['dying']:
- # Dying means that a domain has been destroyed, but has not yet
- # been cleaned up by Xen. This could persist indefinitely if,
- # for example, another domain has some of its pages mapped.
- # We might like to diagnose this problem in the future, but for
- # now all we can sensibly do is ignore it.
- pass
+ elif xeninfo['crashed']:
+ log.warn('Domain has crashed: name=%s id=%d.',
+ self.info['name'], self.domid)
- elif xeninfo['crashed']:
- log.warn('Domain has crashed: name=%s id=%d.',
- self.info['name'], self.domid)
+ if xroot.get_enable_dump():
+ self.dumpCore()
- if xroot.get_enable_dump():
- self.dumpCore()
+ restart_reason = 'crashed'
- self.maybeRestart('crashed')
+ elif xeninfo['shutdown']:
+ reason = shutdown_reason(xeninfo['shutdown_reason'])
- elif xeninfo['shutdown']:
- reason = shutdown_reason(xeninfo['shutdown_reason'])
+ log.info('Domain has shutdown: name=%s id=%d reason=%s.',
+ self.info['name'], self.domid, reason)
- log.info('Domain has shutdown: name=%s id=%d reason=%s.',
- self.info['name'], self.domid, reason)
+ self.clearRestart()
- self.clearRestart()
+ if reason == 'suspend':
+ self.state_set(STATE_VM_SUSPENDED)
+ # Don't destroy the domain. XendCheckpoint will do this
+ # once it has finished.
+ elif reason in ['poweroff', 'reboot']:
+ restart_reason = reason
+ else:
+ self.destroy()
- if reason == 'suspend':
- self.state_set(STATE_VM_SUSPENDED)
- # Don't destroy the domain. XendCheckpoint will do this once
- # it has finished.
- elif reason in ['poweroff', 'reboot']:
- self.maybeRestart(reason)
else:
- self.destroy()
+ # Domain is alive. If we are shutting it down, then check
+ # the timeout on that, and destroy it if necessary.
+
+ sst = self.readDom('xend/shutdown_start_time')
+ if sst:
+ sst = float(sst)
+ timeout = SHUTDOWN_TIMEOUT - time.time() + sst
+ if timeout < 0:
+ log.info(
+ "Domain shutdown timeout expired: name=%s id=%s",
+ self.info['name'], self.domid)
+ self.destroy()
+ else:
+ log.debug(
+ "Scheduling refreshShutdown on domain %d in %ds.",
+ self.domid, timeout)
+ scheduler.later(timeout, self.refreshShutdown)
+ finally:
+ self.refresh_shutdown_lock.release()
- else:
- # Domain is alive. If we are shutting it down, then check
- # the timeout on that, and destroy it if necessary.
-
- sst = self.readVm('xend/shutdown_start_time')
- if sst:
- sst = float(sst)
- timeout = SHUTDOWN_TIMEOUT - time.time() + sst
- if timeout < 0:
- log.info(
- "Domain shutdown timeout expired: name=%s id=%s",
- self.info['name'], self.domid)
- self.destroy()
- else:
- log.debug(
- "Scheduling refreshShutdown on domain %d in %ds.",
- self.domid, timeout)
- scheduler.later(timeout, self.refreshShutdown)
+ if restart_reason:
+ self.maybeRestart(restart_reason)
def shutdown(self, reason):
if not reason in shutdown_reasons.values():
raise XendError('invalid reason:' + reason)
- self.storeVm("control/shutdown", reason)
+ self.storeDom("control/shutdown", reason)
if not reason == 'suspend':
- self.storeVm('xend/shutdown_start_time', time.time())
+ self.storeDom('xend/shutdown_start_time', time.time())
def clearRestart(self):
- self.removeVm("xend/shutdown_start_time")
+ self.removeDom("xend/shutdown_start_time")
def maybeRestart(self, reason):
"""Close the given channel, if set, and remove the given entry in the
store. Nothrow guarantee."""
+ if channel:
+ channel.close()
try:
- try:
- if channel:
- channel.close()
- finally:
- self.removeDom(entry)
+ self.removeDom(entry)
except Exception, exn:
log.exception(exn)
## private:
+ def getDeviceConfigurations(self, deviceClass):
+ return self.getDeviceController(deviceClass).configurations()
+
+
def getDeviceController(self, name):
if name not in controllerClasses:
raise XendError("unknown device type: " + str(name))
def construct(self):
- """Construct the vm instance from its configuration.
+ """Construct the domain.
- @param config: configuration
@raise: VmError on error
"""
self.info['name'])
try:
+ self.dompath = DOMROOT + str(self.domid)
+
self.initDomain()
self.construct_image()
self.configure()
- self.exportToDB()
- except Exception, ex:
- # Catch errors, cleanup and re-raise.
- print 'Domain construction error:', ex
- import traceback
- traceback.print_exc()
+ self.storeVmDetails()
+ self.storeDomDetails()
+ except Exception:
+ log.exception('Domain construction failed')
self.destroy()
- raise
+ raise VmError('Creating domain failed: name=%s' %
+ self.info['name'])
def initDomain(self):
self.domid, self.info['name'], self.info['memory_KiB'])
- def configure_vcpus(self, vcpus):
+ def configure_vcpus(self):
d = {}
- for v in range(0, vcpus):
+ for v in range(0, self.info['vcpus']):
d["cpu/%d/availability" % v] = "online"
self.writeVm(d)
+
def construct_image(self):
"""Construct the boot image for the domain.
"""
self.create_channel()
self.image.createImage()
- self.exportToDB()
- if self.store_channel and self.store_mfn >= 0:
- IntroduceDomain(self.domid, self.store_mfn,
- self.store_channel.port1, self.path)
- # get the configured value of vcpus and update store
- self.configure_vcpus(self.info['vcpus'])
+# !!! self.exportToDB()
+ IntroduceDomain(self.domid, self.store_mfn,
+ self.store_channel.port1, self.dompath)
+ self.configure_vcpus()
## public:
- def delete(self):
- """Delete the vm's db.
- """
- try:
- xstransact.Remove(self.path, 'domid')
- except Exception, ex:
- log.warning("error in domain db delete: %s", ex)
-
-
- def cleanup(self):
- """Cleanup vm resources: release devices. Nothrow guarantee."""
+ def cleanupDomain(self):
+ """Cleanup domain resources; release devices. Idempotent. Nothrow
+ guarantee."""
self.state_set(STATE_VM_TERMINATED)
self.release_devices()
"XendDomainInfo.cleanup: image.destroy() failed.")
self.image = None
+ try:
+ self.removeDom()
+ except Exception:
+ log.exception("Removing domain path failed.")
- def destroy(self):
- """Cleanup vm and destroy domain. Nothrow guarantee."""
-
- log.debug("XendDomainInfo.destroy: domid=%s", str(self.domid))
- self.cleanup()
+ def cleanupVm(self):
+ """Cleanup VM resources. Idempotent. Nothrow guarantee."""
try:
self.removeVm()
except Exception:
log.exception("Removing VM path failed.")
- try:
- self.removeDom()
- except Exception:
- log.exception("Removing domain path failed.")
+ def destroy(self):
+ """Cleanup VM and destroy domain. Nothrow guarantee."""
+
+ log.debug("XendDomainInfo.destroy: domid=%s", str(self.domid))
+
+ self.cleanupDomain()
+ self.cleanupVm()
+
try:
if self.domid is not None:
xc.domain_destroy(dom=self.domid)
"""
return self.state == STATE_VM_TERMINATED
+
def release_devices(self):
- """Release all vm devices. Nothrow guarantee."""
+ """Release all domain's devices. Nothrow guarantee."""
while True:
- t = xstransact("%s/device" % self.path)
+ t = xstransact("%s/device" % self.dompath)
for n in controllerClasses.keys():
for d in t.list(n):
try:
if t.commit():
break
+
def eventChannel(self, path=None):
"""Create an event channel to the domain.
try:
port = int(self.readDom(path))
except:
- # if anything goes wrong, assume the port was not yet set
+ # The port is not yet set, i.e. the channel has not yet been
+ # created.
pass
ret = channel.eventChannel(0, self.domid, port1=port, port2=0)
+
+ # Stale port information from above causes an Invalid Argument to be
+ # thrown by the eventChannel call below. To recover, we throw away
+ # port if it turns out to be bad, and just create a new channel.
+ # If creating a new channel with two new ports fails, then something
+ # else is going wrong, so we bail.
+ while True:
+ try:
+ ret = channel.eventChannel(0, self.domid, port1 = port,
+ port2 = 0)
+ break
+ except:
+ log.exception("Exception in eventChannel(0, %d, %d, %d)",
+ self.domid, port, 0)
+ if port == 0:
+ raise
+ else:
+ port = 0
+ log.error("Recovering from above exception.")
self.storeDom(path, ret.port1)
return ret
"""Restart the domain after it has exited. """
# self.restart_check()
- self.cleanup()
config = self.sxpr()
+ self.cleanupDomain()
+
if self.readVm('xend/restart_in_progress'):
log.error('Xend failed during restart of domain %d. '
'Refusing to restart to avoid loops.',
self.storeVm("cpu/%d/availability" % vcpu, availability)
def send_sysrq(self, key=0):
- self.storeVm("control/sysrq", '%c' % key)
+ self.storeDom("control/sysrq", '%c' % key)
- def dom0_init_store(self):
- if not self.store_channel:
- self.store_channel = self.eventChannel("store/port")
- if not self.store_channel:
- return
+
+ def initStoreConnection(self):
ref = xc.init_store(self.store_channel.port2)
if ref and ref >= 0:
self.setStoreRef(ref)
try:
IntroduceDomain(self.domid, ref, self.store_channel.port1,
- self.path)
+ self.dompath)
except RuntimeError, ex:
if ex.args[0] == errno.EISCONN:
pass
else:
raise
- # get run-time value of vcpus and update store
- self.configure_vcpus(dom_get(self.domid)['vcpus'])
+ self.configure_vcpus()
+
def dom0_enforce_vcpus(self):
dom = 0